Description

This is an exploratory data analysis for educational purposes. The analysis is carried out with data related to COVID-19 reported by various countries. The data is obtained from the Website: https://ourworldindata.org/coronavirus-source-data. The site’s data set is constantly updated. The results of the analysis changes according to the day the code is executed.

Data reading

The data set is loaded directly from the Website to obtain information updated according to the execution day of the code.

covid.df <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")

Required packages are loaded.

library(ggplot2)
library(knitr)
library(reshape2)
library(kableExtra)
#library(dplyr)
library(tidyverse)
library(lubridate)
library(gmodels)
plotscaption <- "oscarcastrolopez.github.io"

Display of the dataframe structure with its variables and their type.

str(covid.df)
## 'data.frame':    37194 obs. of  36 variables:
##  $ iso_code                       : Factor w/ 212 levels "","ABW","AFG",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ continent                      : Factor w/ 7 levels "","Africa","Asia",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ location                       : Factor w/ 212 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date                           : Factor w/ 230 levels "2019-12-31","2020-01-01",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ total_cases                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_cases                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_deaths                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_deaths                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_cases_per_million        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_cases_per_million          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ total_deaths_per_million       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_deaths_per_million         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ new_tests                      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests                    : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ total_tests_per_thousand       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_per_thousand         : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed             : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ new_tests_smoothed_per_thousand: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_per_case                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ positive_rate                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ tests_units                    : Factor w/ 6 levels "","people tested",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stringency_index               : num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ population                     : num  38928341 38928341 38928341 38928341 38928341 ...
##  $ population_density             : num  54.4 54.4 54.4 54.4 54.4 ...
##  $ median_age                     : num  18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 18.6 ...
##  $ aged_65_older                  : num  2.58 2.58 2.58 2.58 2.58 ...
##  $ aged_70_older                  : num  1.34 1.34 1.34 1.34 1.34 ...
##  $ gdp_per_capita                 : num  1804 1804 1804 1804 1804 ...
##  $ extreme_poverty                : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ cardiovasc_death_rate          : num  597 597 597 597 597 ...
##  $ diabetes_prevalence            : num  9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 9.59 ...
##  $ female_smokers                 : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ male_smokers                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ handwashing_facilities         : num  37.7 37.7 37.7 37.7 37.7 ...
##  $ hospital_beds_per_thousand     : num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
##  $ life_expectancy                : num  64.8 64.8 64.8 64.8 64.8 ...

A detailed description of each variable of the data set can be found on the following Website: https://github.com/owid/covid-19-data/blob/master/public/data/owid-covid-codebook.csv.

The variable date is class factor is converted to class date.

covid.df$date <- as.Date(covid.df$date, format("%Y-%m-%d"))

List of countries of the data set.

levels(covid.df$location)
##   [1] "Afghanistan"                      "Albania"                         
##   [3] "Algeria"                          "Andorra"                         
##   [5] "Angola"                           "Anguilla"                        
##   [7] "Antigua and Barbuda"              "Argentina"                       
##   [9] "Armenia"                          "Aruba"                           
##  [11] "Australia"                        "Austria"                         
##  [13] "Azerbaijan"                       "Bahamas"                         
##  [15] "Bahrain"                          "Bangladesh"                      
##  [17] "Barbados"                         "Belarus"                         
##  [19] "Belgium"                          "Belize"                          
##  [21] "Benin"                            "Bermuda"                         
##  [23] "Bhutan"                           "Bolivia"                         
##  [25] "Bonaire Sint Eustatius and Saba"  "Bosnia and Herzegovina"          
##  [27] "Botswana"                         "Brazil"                          
##  [29] "British Virgin Islands"           "Brunei"                          
##  [31] "Bulgaria"                         "Burkina Faso"                    
##  [33] "Burundi"                          "Cambodia"                        
##  [35] "Cameroon"                         "Canada"                          
##  [37] "Cape Verde"                       "Cayman Islands"                  
##  [39] "Central African Republic"         "Chad"                            
##  [41] "Chile"                            "China"                           
##  [43] "Colombia"                         "Comoros"                         
##  [45] "Congo"                            "Costa Rica"                      
##  [47] "Cote d'Ivoire"                    "Croatia"                         
##  [49] "Cuba"                             "Curacao"                         
##  [51] "Cyprus"                           "Czech Republic"                  
##  [53] "Democratic Republic of Congo"     "Denmark"                         
##  [55] "Djibouti"                         "Dominica"                        
##  [57] "Dominican Republic"               "Ecuador"                         
##  [59] "Egypt"                            "El Salvador"                     
##  [61] "Equatorial Guinea"                "Eritrea"                         
##  [63] "Estonia"                          "Ethiopia"                        
##  [65] "Faeroe Islands"                   "Falkland Islands"                
##  [67] "Fiji"                             "Finland"                         
##  [69] "France"                           "French Polynesia"                
##  [71] "Gabon"                            "Gambia"                          
##  [73] "Georgia"                          "Germany"                         
##  [75] "Ghana"                            "Gibraltar"                       
##  [77] "Greece"                           "Greenland"                       
##  [79] "Grenada"                          "Guam"                            
##  [81] "Guatemala"                        "Guernsey"                        
##  [83] "Guinea"                           "Guinea-Bissau"                   
##  [85] "Guyana"                           "Haiti"                           
##  [87] "Honduras"                         "Hong Kong"                       
##  [89] "Hungary"                          "Iceland"                         
##  [91] "India"                            "Indonesia"                       
##  [93] "International"                    "Iran"                            
##  [95] "Iraq"                             "Ireland"                         
##  [97] "Isle of Man"                      "Israel"                          
##  [99] "Italy"                            "Jamaica"                         
## [101] "Japan"                            "Jersey"                          
## [103] "Jordan"                           "Kazakhstan"                      
## [105] "Kenya"                            "Kosovo"                          
## [107] "Kuwait"                           "Kyrgyzstan"                      
## [109] "Laos"                             "Latvia"                          
## [111] "Lebanon"                          "Lesotho"                         
## [113] "Liberia"                          "Libya"                           
## [115] "Liechtenstein"                    "Lithuania"                       
## [117] "Luxembourg"                       "Macedonia"                       
## [119] "Madagascar"                       "Malawi"                          
## [121] "Malaysia"                         "Maldives"                        
## [123] "Mali"                             "Malta"                           
## [125] "Mauritania"                       "Mauritius"                       
## [127] "Mexico"                           "Moldova"                         
## [129] "Monaco"                           "Mongolia"                        
## [131] "Montenegro"                       "Montserrat"                      
## [133] "Morocco"                          "Mozambique"                      
## [135] "Myanmar"                          "Namibia"                         
## [137] "Nepal"                            "Netherlands"                     
## [139] "New Caledonia"                    "New Zealand"                     
## [141] "Nicaragua"                        "Niger"                           
## [143] "Nigeria"                          "Northern Mariana Islands"        
## [145] "Norway"                           "Oman"                            
## [147] "Pakistan"                         "Palestine"                       
## [149] "Panama"                           "Papua New Guinea"                
## [151] "Paraguay"                         "Peru"                            
## [153] "Philippines"                      "Poland"                          
## [155] "Portugal"                         "Puerto Rico"                     
## [157] "Qatar"                            "Romania"                         
## [159] "Russia"                           "Rwanda"                          
## [161] "Saint Kitts and Nevis"            "Saint Lucia"                     
## [163] "Saint Vincent and the Grenadines" "San Marino"                      
## [165] "Sao Tome and Principe"            "Saudi Arabia"                    
## [167] "Senegal"                          "Serbia"                          
## [169] "Seychelles"                       "Sierra Leone"                    
## [171] "Singapore"                        "Sint Maarten (Dutch part)"       
## [173] "Slovakia"                         "Slovenia"                        
## [175] "Somalia"                          "South Africa"                    
## [177] "South Korea"                      "South Sudan"                     
## [179] "Spain"                            "Sri Lanka"                       
## [181] "Sudan"                            "Suriname"                        
## [183] "Swaziland"                        "Sweden"                          
## [185] "Switzerland"                      "Syria"                           
## [187] "Taiwan"                           "Tajikistan"                      
## [189] "Tanzania"                         "Thailand"                        
## [191] "Timor"                            "Togo"                            
## [193] "Trinidad and Tobago"              "Tunisia"                         
## [195] "Turkey"                           "Turks and Caicos Islands"        
## [197] "Uganda"                           "Ukraine"                         
## [199] "United Arab Emirates"             "United Kingdom"                  
## [201] "United States"                    "United States Virgin Islands"    
## [203] "Uruguay"                          "Uzbekistan"                      
## [205] "Vatican"                          "Venezuela"                       
## [207] "Vietnam"                          "Western Sahara"                  
## [209] "World"                            "Yemen"                           
## [211] "Zambia"                           "Zimbabwe"

List of continents of the data set.

levels(covid.df$continent)
## [1] ""              "Africa"        "Asia"          "Europe"       
## [5] "North America" "Oceania"       "South America"

Máximum number of days that the data set comprises.

maxdays <- max(table(covid.df$location))
print(paste("Maximum reported days:",maxdays))
## [1] "Maximum reported days: 230"

Countries reporting data of the maximum days of the data set.

names(table(covid.df$location)[table(covid.df$location)==maxdays])
##  [1] "Australia"            "Austria"              "Belarus"             
##  [4] "Belgium"              "Brazil"               "Canada"              
##  [7] "China"                "Croatia"              "Czech Republic"      
## [10] "Denmark"              "Estonia"              "Finland"             
## [13] "France"               "Germany"              "Greece"              
## [16] "Iceland"              "Iran"                 "Israel"              
## [19] "Italy"                "Japan"                "Lithuania"           
## [22] "Luxembourg"           "Malaysia"             "Mexico"              
## [25] "Nepal"                "Netherlands"          "Norway"              
## [28] "Russia"               "Singapore"            "South Korea"         
## [31] "Sweden"               "Switzerland"          "Taiwan"              
## [34] "Thailand"             "United Arab Emirates" "United Kingdom"      
## [37] "United States"        "Vietnam"              "World"

Start and end dates of the data set.

startdate <- min(covid.df$date)
enddate <- max(covid.df$date)
todayformatted <- format(Sys.Date(), "%A, %B %d, %Y")
todayformatted <- paste(toupper(substr(todayformatted, 1, 1)), 
                        substr(todayformatted, 2, nchar(todayformatted)), 
                        sep="")
print(paste("Starts:", format(startdate, "%A, %B, %d of %Y"), 
            "Ends:",  format(enddate, "%A, %B, %d of %Y")))
## [1] "Starts: Tuesday, December, 31 of 2019 Ends: Sunday, August, 16 of 2020"

Rankings of countries with the variables of totals

From the original dataframe, only the rows where date is equal to the maximum date (or compilation date) are obtained. Using this filter only the updated totals or accumulated values of each country are obtained. Additionally, countries with less than a million inhabitants are discarded. Some countries with small population have very high statistics per million of inhabitants.

The rankings are created with the following variables:

# Get one row of each country with the updated totals
covid.total.df <- covid.df[covid.df$date== enddate,]
# Filter countries with less than 1 millón population
covid.total.df <- covid.total.df[covid.total.df$population >= 1000000,]
covid.total.df$location <- droplevels.factor(covid.total.df$location)
print(names(covid.total.df)[c(5,7,9,11)])
## [1] "total_cases"              "total_deaths"            
## [3] "total_cases_per_million"  "total_deaths_per_million"

After discarding the countries that have less than a million inhabitants, the data set now has: 156 countries.

For each of the variables with total values, the 20 countries with the highest and lowest values are obtained. If México is not included in the top or bottom 20 it is added to the list indicating its ranking position. For each variable to be analyzed, the following is done in R:

  1. The data is ordered by the corresponding variable to be analyzed and stored in a new dataframe.
  2. In the new dataframe the information of the 20 countries with the highest values and the 20 countries with the lowest values are kept, the rest is discarded (row filtering).
  3. In the new dataframe only variables of interest are kept, the rest is discarded (column filtering). An additional column is created with the numeric ranking of each country.
  4. The data of the rankings is displayed as tables and bar graphs.

Ranking of countries by total cases of COVID-19 with column total_cases

# Data is ordered according to the total_cases column
ranking.total_cases <- covid.total.df[order(-covid.total.df$total_cases),]
# The row corresponding to World is removed
ranking.total_cases <- ranking.total_cases[ranking.total_cases$location != "World", ]
# A new column indicating the positionin the rank is added
ranking.total_cases$position <- 1:nrow(ranking.total_cases)
# Only columns of interest are kept
columnfilter <- c("position", "location", "total_cases")
bottom20.total_cases <- tail(ranking.total_cases[, columnfilter],20)
top20.total_cases <- head(ranking.total_cases[, columnfilter],20)
rm(ranking.total_cases)
rownames(top20.total_cases) <- c()
rownames(bottom20.total_cases) <- c()

mexrow <- which(top20.total_cases$location=='Mexico')
top20.total_cases$total_cases_formated <- formatC(top20.total_cases$total_cases, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_cases$total_cases_formated <- formatC(bottom20.total_cases$total_cases, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases
Position Country Total cases
1 United States 5,361,165
2 Brazil 3,317,096
3 India 2,589,682
4 Russia 917,884
5 South Africa 583,653
6 Peru 525,803
7 Mexico 517,714
8 Colombia 456,689
9 Chile 383,902
10 Iran 341,070
11 United Kingdom 316,367
12 Saudi Arabia 297,315
13 Pakistan 288,717
14 Argentina 282,424
15 Bangladesh 274,525
16 Italy 253,438
17 Turkey 248,117
18 Germany 223,453
19 France 215,521
20 Iraq 172,583
Countries with least cases
Position Country Total cases
136 Burkina Faso 1,249
137 Botswana 1,214
138 Niger 1,165
139 Togo 1,130
140 Jamaica 1,082
141 Chad 952
142 Vietnam 934
143 Lesotho 903
144 Tanzania 509
145 Taiwan 484
146 Trinidad and Tobago 474
147 Burundi 412
148 Myanmar 374
149 Mauritius 344
150 Mongolia 298
151 Eritrea 285
152 Cambodia 273
153 Papua New Guinea 271
154 Timor 25
155 Laos 20

Top 20 graph

ggplot(data=top20.total_cases, aes(x=reorder(paste(position, location),total_cases), 
                                   y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=total_cases_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 countries with most cases of  COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 with column total_cases - American countries

This ranking is done with American countries only. The data is obtained by filtering the rows where the continent variable is either “North America” or “South America”.

ranking.total_cases.america <- covid.total.df[covid.total.df$continent %in% 
                                                c("North America", "South America") ,]
ranking.total_cases.america <- ranking.total_cases.america[order(-ranking.total_cases.america$total_cases),]
ranking.total_cases.america$position <- 1:nrow(ranking.total_cases.america)
bottom20.total_cases.america <- tail(ranking.total_cases.america[, c("position", "location", "total_cases")],5)
top20.total_cases.america <- head(ranking.total_cases.america[, c("position", "location", "total_cases")],20)
rownames(bottom20.total_cases.america) <- c()
rownames(top20.total_cases.america) <- c()
mexrow <- which(ranking.total_cases.america$location=='Mexico')
rm(ranking.total_cases.america)
top20.total_cases.america$total_cases_formated <- formatC(top20.total_cases.america$total_cases,
                                                          format="f", big.mark=",", digits=0)
bottom20.total_cases.america$total_cases_formated <- formatC(bottom20.total_cases.america$total_cases, 
                                                             format="f", big.mark=",", digits=0)

Table

tablecolnames <- c("Position", "Country", "Total cases")
kable(top20.total_cases.america[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries of the American Continent with most cases") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "center") %>%
row_spec(mexrow, bold = T, color = "black", background = "yellow")
Countries of the American Continent with most cases
Position Country Total cases
1 United States 5,361,165
2 Brazil 3,317,096
3 Peru 525,803
4 Mexico 517,714
5 Colombia 456,689
6 Chile 383,902
7 Argentina 282,424
8 Canada 121,889
9 Ecuador 100,688
10 Bolivia 99,146
11 Dominican Republic 85,545
12 Panama 80,665
13 Guatemala 62,313
14 Honduras 49,979
15 Venezuela 32,607
16 Costa Rica 27,737
17 Puerto Rico 25,695
18 El Salvador 22,619
19 Paraguay 9,381
20 Haiti 7,831

Top 20 graph

ggplot(data=top20.total_cases.america, aes(x=reorder(paste(position, location),total_cases),
                                           y=total_cases, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19") +
  geom_text(aes(y=max(total_cases)+170000, 
                label=format(total_cases, big.mark=","),
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of American countries with most cases of COVID-19", 
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(100000, 250000, 500000, 750000, 1000000, 2000000, 3000000, 4000000, 5000000),
                     label=c("100k", "250k", "500k", "750k", "1m", "2m", "3m", "4m", "5m"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total cases of COVID-19 per million inhabitants with column total_cases_per_million

ranking.total_cases_per_million <- covid.total.df[order(-covid.total.df$total_cases_per_million),]
ranking.total_cases_per_million$position <- 1:nrow(ranking.total_cases_per_million)
columnfilter <- c("position", "location", "total_cases_per_million")
mexico.total_cases_per_million <- ranking.total_cases_per_million[
                                  ranking.total_cases_per_million$location == "Mexico", ]
bottom20.total_cases_per_million <- tail(ranking.total_cases_per_million[, columnfilter],20)
top20.total_cases_per_million <- head(ranking.total_cases_per_million[, columnfilter],20)
mexico.total_cases_per_million <- mexico.total_cases_per_million[, columnfilter]
rm(ranking.total_cases_per_million)
rownames(top20.total_cases_per_million) <- c()
rownames(bottom20.total_cases_per_million) <- c()
rownames(mexico.total_cases_per_million) <- c()

top20.total_cases_per_million <- rbind(top20.total_cases_per_million, mexico.total_cases_per_million)
mexrow <- which(top20.total_cases_per_million$location=='Mexico')

top20.total_cases_per_million$total_cases_per_million_formated <- formatC(
  top20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)
bottom20.total_cases_per_million$total_cases_per_million_formated<- formatC(
  bottom20.total_cases_per_million$total_cases_per_million, format="f", big.mark=",", digits=2)

Tables

tablecolnames <- c("Position", "country", "Cases pmi")
kable(top20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_cases_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least cases per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most cases per million inhabitants (pmi)
Position country Cases pmi
1 Qatar 39,849.57
2 Bahrain 27,286.36
3 Chile 20,082.54
4 Panama 18,695.10
5 Kuwait 17,725.30
6 Oman 16,238.52
7 United States 16,196.74
8 Peru 15,947.03
9 Brazil 15,605.50
10 Armenia 14,003.28
11 Israel 10,668.66
12 South Africa 9,840.94
13 Singapore 9,514.14
14 Puerto Rico 8,981.63
15 Colombia 8,975.30
16 Saudi Arabia 8,540.13
17 Bolivia 8,493.60
18 Sweden 8,346.54
19 Dominican Republic 7,885.85
20 Moldova 7,413.31
43 Mexico 4,015.38
Countries with least cases per million inhabitants (pmi)
Position country Cases pmi
137 Mongolia 90.90
138 Mozambique 89.30
139 Eritrea 80.36
140 Yemen 62.43
141 China 62.03
142 Burkina Faso 59.75
143 Chad 57.96
144 Angola 56.35
145 Thailand 48.38
146 Niger 48.13
147 Burundi 34.65
148 Uganda 31.35
149 Papua New Guinea 30.29
150 Taiwan 20.32
151 Timor 18.96
152 Cambodia 16.33
153 Vietnam 9.60
154 Tanzania 8.52
155 Myanmar 6.87
156 Laos 2.75

Top 20 graph

ggplot(data=top20.total_cases_per_million, 
       aes(x=reorder(paste(position, location),total_cases_per_million), 
           y=total_cases_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total cases of COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_cases_per_million)+1250, 
                label=total_cases_per_million_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with most cases of COVID-19 per million inhabitants (+ México)",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 with the variable total_deaths

ranking.total_deaths <- covid.total.df[order(-covid.total.df$total_deaths),]
ranking.total_deaths <- ranking.total_deaths[ranking.total_deaths$location != "World", ]
ranking.total_deaths$position <- 1:nrow(ranking.total_deaths)
columnfilter <- c("position", "location", "total_deaths")
bottom20.total_deaths <- tail(ranking.total_deaths[, columnfilter],20)
top20.total_deaths <- head(ranking.total_deaths[, columnfilter],20)
mexrow <- which(ranking.total_deaths$location=='Mexico')
rm(ranking.total_deaths)
rownames(top20.total_deaths) <- c()
rownames(bottom20.total_deaths) <- c()

top20.total_deaths$total_deaths_formated <- formatC(top20.total_deaths$total_deaths, 
                                                  format="f", big.mark=",", digits=0)
bottom20.total_deaths$total_deaths_formated <- formatC(bottom20.total_deaths$total_deaths, 
                                                     format="f", big.mark=",", digits=0)

Tables

tablecolnames <- c("Position", "Country", "Total deaths")
kable(top20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with most deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, caption="Countries with least deaths from COVID-19") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19
Position Country Total deaths
1 United States 169,481
2 Brazil 107,232
3 Mexico 56,543
4 India 49,980
5 United Kingdom 41,358
6 Italy 35,392
7 France 30,409
8 Peru 26,075
9 Iran 19,492
10 Russia 15,617
11 Colombia 14,810
12 South Africa 11,677
13 Chile 10,395
14 Belgium 9,935
15 Germany 9,231
16 Canada 9,024
17 Pakistan 6,168
18 Netherlands 6,160
19 Indonesia 6,071
20 Ecuador 6,065
Countries with least deaths from COVID-19
Position Country Total deaths
136 Tanzania 21
137 Mozambique 19
138 Georgia 17
139 Jamaica 14
140 Uganda 13
141 Jordan 11
142 Sri Lanka 11
143 Mauritius 10
144 Trinidad and Tobago 10
145 Rwanda 8
146 Taiwan 7
147 Myanmar 6
148 Botswana 3
149 Papua New Guinea 3
150 Burundi 1
151 Cambodia 0
152 Eritrea 0
153 Laos 0
154 Mongolia 0
155 Timor 0

Top 20 graph

ggplot(data=top20.total_deaths, 
       aes(x=reorder(paste(position, location),total_deaths), 
                                   y=total_deaths, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Total deaths from COVID-19") +
  geom_text(aes(y=max(total_deaths)+7500, 
                label=total_deaths_formated,
                fontface="bold"), 
            color="black")+
  labs(title="Top 20 of countries with total deaths from COVID-19",
       subtitle = todayformatted,
       caption = plotscaption)+
  scale_y_continuous(breaks=c(10000, 25000, 50000, 75000, 100000, 150000, 170000), 
                     label=c("10k", "25k", "50k", "75k", "100k", "150k", "170k"))+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Ranking of countries by total deaths from COVID-19 per million inhabitants using the variable total_deaths_per_million

ranking.total_deaths_per_million <- covid.total.df[order(-covid.total.df$total_deaths_per_million),]
ranking.total_deaths_per_million$position <- 1:nrow(ranking.total_deaths_per_million)
columnfilter <- c("position", "location", "total_deaths_per_million")
bottom20.total_deaths_per_million <- tail(ranking.total_deaths_per_million[, columnfilter],20)
top20.total_deaths_per_million <- head(ranking.total_deaths_per_million[, columnfilter],20)
mexrow <- which(ranking.total_deaths_per_million$location=='Mexico')
rm(ranking.total_deaths_per_million)
rownames(top20.total_deaths_per_million) <- c()
rownames(bottom20.total_deaths_per_million) <- c()

top20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  top20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)
bottom20.total_deaths_per_million$total_deaths_per_million_formated <- formatC(
  bottom20.total_deaths_per_million$total_deaths_per_million, format="f", big.mark=",", digits=2)

Table

tablecolnames <- c("Position", "Country", "Deaths pmi")

kable(top20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with most deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "float_left") %>%
  row_spec(mexrow, bold = T, color = "black", background = "yellow")

kable(bottom20.total_deaths_per_million[,c(1,2,4)], format = "html", align = "clr", 
      col.names=tablecolnames, 
      caption="Countries with least deaths from COVID-19 per million inhabitants (pmi)") %>% 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), 
                full_width = F, position = "right")
Countries with most deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
1 Belgium 857.23
2 Peru 790.83
3 United Kingdom 609.23
4 Italy 585.36
5 Sweden 572.62
6 Chile 543.78
7 United States 512.02
8 Brazil 504.48
9 France 465.87
10 Mexico 438.55
11 Panama 404.66
12 Netherlands 359.50
13 Ireland 359.27
14 Ecuador 343.76
15 Bolivia 342.93
16 Colombia 291.06
17 Armenia 275.71
18 Macedonia 258.71
19 Canada 239.10
20 Iran 232.07
Countries with least deaths from COVID-19 per million inhabitants (pmi)
Position Country Deaths pmi
137 Angola 2.62
138 Burkina Faso 2.58
139 Botswana 1.28
140 Jordan 1.08
141 Thailand 0.83
142 Rwanda 0.62
143 Mozambique 0.61
144 Sri Lanka 0.51
145 Tanzania 0.35
146 Papua New Guinea 0.34
147 Taiwan 0.29
148 Uganda 0.28
149 Vietnam 0.23
150 Myanmar 0.11
151 Burundi 0.08
152 Cambodia 0.00
153 Eritrea 0.00
154 Laos 0.00
155 Mongolia 0.00
156 Timor 0.00

Top 20 graph

ggplot(data=top20.total_deaths_per_million, 
       aes(x=reorder(paste(position, location),total_deaths_per_million), 
           y=total_deaths_per_million, fill=location))+
  geom_bar(stat = "identity", position=position_dodge(), colour="black", show.legend = FALSE)+
  ylab("Deaths from COVID-19 per million inhabitants") +
  geom_text(aes(y=max(total_deaths_per_million)+25, 
                label=total_deaths_per_million_formated,
                fontface="bold"),
            color="black")+
  labs(title="Top 20 countries by deaths from COVID-19 per million inhabitants",
       subtitle = todayformatted,
       caption = plotscaption)+
  coord_flip() +
  xlab("Countries") +
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Cumulative COVID-19 Cases and Deaths

For the following graphs, the variables total_cases and total_deaths are used.

Curves of total cases of the five countries with most cases (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Sunday, August 16, 2020.

Functions to build the line graphs.

##Line plot of new_total_cases with date breaks by month
plot.trend.total_cases.month <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
##Line plot of new_deaths with date breaks by short month and day
plot.trend.total_cases.monthdays <- function(startdate, enddate, countries, graphtitle, graphsubtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_cases, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Países")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = graphsubtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative cases of COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative cases of COVID-19"
plot.trend.total_cases.month(startdate, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_cases.month(startdate.tmp, enddate, top5.total_cases, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of cases of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with most cases are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_cases.others <- as.character(top20.total_cases$location)[3:7]
plot.trend.total_cases.month(startdate.tmp, enddate, total_cases.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-21, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

-2 Weeks MX, PE y SA

total_cases.others <- as.character(top20.total_cases$location)[3:7]
total_cases.others <- total_cases.others[c(-1,-2)]
countrieslabels <- paste(total_cases.others, collapse = ', ')
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
title.tmp <- paste(title.tmp, "of", countrieslabels)
plot.trend.total_cases.monthdays(enddate-14, enddate, total_cases.others, title.tmp, sublabel.tmp)

Curves of daily deaths of the five countries with most deaths from COVID-19 (+ México)

Line graphs with date ranges. The starting date varies, whereas the end date is always Sunday, August 16, 2020.

Functions to build the line graphs.

# Generate total_deaths line plot with month breaks
plot.trend.total_deaths.month <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "month", date_labels = "%B")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}
# Generate total_deaths line plot with month and day breaks
plot.trend.total_deaths.monthdays <- function(startdate, enddate, countries, graphtitle, subgraphtitle){
  tmp.df <- covid.df[covid.df$location %in% countries,]
  tmp.df <- tmp.df[tmp.df$date >= startdate,]
  tmp.df <- tmp.df[tmp.df$date <= enddate,]
  ggplot(data=tmp.df , aes(x=date, y=total_deaths, group=location, colour=location)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Countries")+
    scale_x_date(date_breaks = "day", date_labels = "%b %d")+
    ggtitle(graphtitle,
            subtitle = subgraphtitle)+
    labs(caption = plotscaption)+
    ylab("Cumulative deaths from COVID-19")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"),
        legend.position = "bottom",
        legend.title = element_text(size = 14),
        legend.text = element_text(size = 13),
        legend.key.width = unit(1.5,"cm"))+
        guides(colour = guide_legend(override.aes = list(size=2)))
}

All

top5.total_deaths <-  head(as.character(top20.total_deaths$location),5)
sublabel.tmp <- paste("Date range:", format(startdate, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
title.tmp <- "Cumulative deaths from COVID-19"
plot.trend.total_deaths.month(startdate, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-5 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-5
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-3 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-3
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-2 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-2
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Months

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
plot.trend.total_deaths.month(startdate.tmp, enddate, top5.total_deaths, title.tmp, sublabel.tmp)

-1 Month TOP[3:7]

The amount of deaths of United States and Brazil do not allow to appreciate in detail the curves of the rest of the countries. Those two countries are removed from the graph, the countries in positions 6 and 7 of the top 20 of countries with the most total deaths are added.

mes.tmp <- as.numeric(format(enddate, "%m"))-1
mes.tmp <- ifelse(mes.tmp < 10, paste0("0",mes.tmp), mes.tmp)
startdate.tmp <- paste0("2020-", mes.tmp, "-01")
startdate.tmp <- as.Date(startdate.tmp)
sublabel.tmp <- paste("Date range:", format(startdate.tmp, "%m/%d/%Y"),"-", format(enddate, "%m/%d/%Y"))
total_deaths.others <- as.character(top20.total_deaths$location)[3:7]
plot.trend.total_deaths.month(startdate.tmp, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks TOP[3:7]

sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-3 Weeks IN, IR, MX, PE, RU

The acumulative curves of France, Italy, and the United Kindgdom are almost flat, the number of deaths from COVID-19 has stagnated. One possible reason is that the pandemic started first in Europe and the number of recently reported deaths is zero or very low. The curves for Mexico and India continues to rise. The countries of France, Italy, and the United Kingdom are discarded from the data. Other countries are added where it seems that the number of deaths from COVID-19 is still increasing. The graph is generated with the accumulated death curves of the last 3 weeks of India, Iran, Mexico, Peru and Russia:

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-21, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-21, enddate, total_deaths.others, title.tmp, sublabel.tmp)

-2 Weeks IN, IR, MX, PE, RU

total_deaths.others <- as.character(top20.total_deaths$location)[c(3,5,8,9,10)]
countrieslabels <- paste(total_deaths.others, collapse = ', ')
title.tmp <- paste(title.tmp, "de", countrieslabels)
sublabel.tmp <- paste("Date range:", format(enddate-14, "%d/%m/%Y"),"-", format(enddate, "%d/%m/%Y"))
plot.trend.total_deaths.monthdays(enddate-14, enddate, total_deaths.others, title.tmp, sublabel.tmp)

Comparison of COVID-19 cases and deaths curves in Mexico

Daily cases and deaths

covid.mexico <- covid.df[covid.df$location=='Mexico',]
covid.mexico <- covid.mexico[covid.mexico$date > "2020-04-01",]
covid.mexico.new <- covid.mexico[,c("date", "new_cases", "new_deaths")]
covid.mexico.new.lf <-melt(covid.mexico.new, id.vars = c("date"))
ggplot(data=covid.mexico.new.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Daily cases and deaths last month

covid.mexico.new.lastmonth <- covid.mexico.new[covid.mexico.new$date >= enddate-30, ]
covid.mexico.new.lastmonth.lf <- melt(covid.mexico.new.lastmonth, id.vars = c("date"))
ggplot(data=covid.mexico.new.lastmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    ggtitle("COVID-19 Daily cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths

covid.mexico.total <- covid.mexico[,c("date", "total_cases", "total_deaths")]
covid.mexico.total.lf <-melt(covid.mexico.total, id.vars = c("date"))
ggplot(data=covid.mexico.total.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Date")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

Total cases and deaths last month

covid.mexico.total.lasttmonth <- covid.mexico.total[covid.mexico.total$date >= enddate-30, ]
covid.mexico.total.lasttmonth.lf <- melt(covid.mexico.total.lasttmonth, id.vars = c("date"))
ggplot(data=covid.mexico.total.lasttmonth.lf , aes(x=date, y=value, group=variable, colour=variable)) +
    geom_line(size=1) +
    scale_color_discrete(name = "Indicator", labels=c("Cases", "Deaths"))+
    scale_x_date(date_breaks = "week", date_labels = "%b %d")+
    scale_y_continuous(breaks=c(50000, 100000, 200000, 300000, 400000, 500000, 750000),
                     label=c("50k","100k", "200k", "300k", "400k", "500k", "750k"))+
    ggtitle("COVID-19 Cumulative cases and deaths in México - Last month")+
    labs(caption = plotscaption)+
    ylab("")+
    xlab("Fecha")+
    theme_bw()+
    theme(title = element_text(size=14, face="bold", colour = "black"),
          axis.text.y = element_text(size=11, face="bold", colour = "black"), 
          axis.text.x = element_text(size=11, face="bold", colour = "black"),
          legend.position = "bottom",
          legend.title = element_text(size = 14),
          legend.text = element_text(size = 13),
          legend.key.width = unit(1.5,"cm"))+
    guides(colour = guide_legend(override.aes = list(size=2)))

COVID-19 cases and deaths in México per week

covid.mexico <- covid.df[covid.df$location=='Mexico',]
week.agg <- covid.mexico %>%
  group_by(week = week(date)) %>%
  summarise(mean_new_cases = mean(new_cases),
            n = n(), 
            loCI_new_cases = ci(new_cases)[2], 
            hiCI_new_cases = ci(new_cases)[3], 
            mean_new_deaths = mean(new_deaths), 
            loCI_new_deaths = ci(new_deaths)[2], 
            hiCI_new_deaths = ci(new_deaths)[3], 
            total_new_cases = sum(new_cases), 
            total_new_deaths = sum(new_deaths))
# Weeks with zero mean new cases are deleted
week.agg <- week.agg[which(week.agg$mean_new_cases > 0),]
# Finding the date of the first week to be ploted
firstweek <- min(week.agg$week)
rowindex <- match(firstweek, week(covid.mexico[,"date"]))
firstweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(firstweek, rowindex)
# Finding the date of the last week to be ploted
lastweek <- max(week.agg$week)
rowindex <- match(lastweek, week(covid.mexico[,"date"]))
lastweek.date <- format(covid.mexico[rowindex, ]$date, "%d/%m/%Y")
rm(lastweek, rowindex)
sublabel <- paste("Date range:", firstweek.date, "-", lastweek.date)

Mean cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_cases))+
  geom_bar(stat = 'identity', aes(fill = mean_new_cases)) +
  geom_errorbar(aes(ymin=loCI_new_cases, ymax=hiCI_new_cases), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean cases per week ± CI 95%")+
  labs(title="Mean cases per week of COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Mean deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=mean_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = mean_new_deaths)) +
  geom_errorbar(aes(ymin=loCI_new_deaths, ymax=hiCI_new_deaths), width=.2,
                position=position_dodge(.9)) +
  xlab("Week")+
  ylab("Mean deaths per week ± CI 95%")+
  labs(title="Mean deaths per week from COVID-19 in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total cases per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_cases))+
  geom_bar(stat = 'identity', aes(fill = total_new_cases)) +
  xlab("Week")+
  ylab("Total cases per week")+
  labs(title="Total COVID-19 cases per week in México",
       subtitle = sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total cases", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))

Total deaths per week

ggplot(data=week.agg, aes(x=as.character(week),y=total_new_deaths))+
  geom_bar(stat = 'identity', aes(fill = total_new_deaths)) +
  xlab("Week")+
  ylab("Total deaths per week")+
  labs(title="Total COVID-19 deaths per week in México",
       subtitle=sublabel,
       caption = plotscaption)+
  scale_fill_gradient(name="Total deaths", low = "green", high = "red")+
  theme_bw()+
  theme(title = element_text(size=14, face="bold", colour = "black"),
        axis.text.y = element_text(size=11, face="bold", colour = "black"),
        axis.text.x = element_text(size=11, face="bold", colour = "black"))
## [1] "es_MX.UTF-8"